1 Set Up

start_time <- Sys.time()
library(BiocParallel)
library(ggplot2)
library(GCIMS)
library(dplyr)
library(openxlsx)
library(htmlwidgets)
source("../../load_targetml_tools.R")
## >>> loading tools...
## [1] "/storage/users/tduran/Projects/targetml-gcims-tools/R/baseline_exploratory.R"
## [2] "/storage/users/tduran/Projects/targetml-gcims-tools/R/baseline.R"            
## [3] "/storage/users/tduran/Projects/targetml-gcims-tools/R/cal_curves.R"          
## [4] "/storage/users/tduran/Projects/targetml-gcims-tools/R/optimize_alignment.R"  
## [5] "/storage/users/tduran/Projects/targetml-gcims-tools/R/utils_gcims.R"         
## [6] "/storage/users/tduran/Projects/targetml-gcims-tools/R/voc_identification.R"  
## → Sourcing: /storage/users/tduran/Projects/targetml-gcims-tools/R/baseline_exploratory.R 
## → Sourcing: /storage/users/tduran/Projects/targetml-gcims-tools/R/baseline.R 
## → Sourcing: /storage/users/tduran/Projects/targetml-gcims-tools/R/cal_curves.R 
## → Sourcing: /storage/users/tduran/Projects/targetml-gcims-tools/R/optimize_alignment.R 
## → Sourcing: /storage/users/tduran/Projects/targetml-gcims-tools/R/utils_gcims.R 
## → Sourcing: /storage/users/tduran/Projects/targetml-gcims-tools/R/voc_identification.R

Result Storage

All results are automatically saved in a folder named with the current date and time, following the pattern results_<YYYY-MM-DD_HH-MM>.

## The results will be saved in the folder:  /storage/users/tduran/Projects/targetml-gcims-tools/data/tgn_results/measurements/su/results_2025-06-06_10-36
##             FileName          SampleID   class matrix patient_id control_level
## 1  250519_140105.mea 250519_140105.mea control     SU         NA           MQC
## 2  250519_213542.mea 250519_213542.mea control     SU         NA           LQC
## 3  250520_040523.mea 250520_040523.mea control     SU         NA           HQC
## 4  250520_130631.mea 250520_130631.mea control     SU         NA           MQC
## 5  250520_204110.mea 250520_204110.mea control     SU         NA           LQC
## 6  250521_031056.mea 250521_031056.mea control     SU         NA           HQC
## 7  250521_133931.mea 250521_133931.mea control     SU         NA           MQC
## 8  250522_113050.mea 250522_113050.mea control     SU         NA           LQC
## 9  250522_180032.mea 250522_180032.mea control     SU         NA           HQC
## 10 250522_211844.mea 250522_211844.mea control     SU         NA           MQC
## 11 250523_034828.mea 250523_034828.mea control     SU         NA           LQC
## 12 250523_080815.mea 250523_080815.mea control     SU         NA           HQC
##    day
## 1    4
## 2    4
## 3    4
## 4    5
## 5    5
## 6    5
## 7    6
## 8    6
## 9    6
## 10   7
## 11   7
## 12   7

1.1 1) Dataset creation

samples_directory <- "/storage/projects/TargetML/tgn/all_samples"
FileName <- sort(list.files(samples_directory, full.names = FALSE))

# Create GCIMS dataset object
urine <- GCIMSDataset$new(
  annotations,
  base_dir = samples_directory,
  on_ram = FALSE
)
urine
## A GCIMSDataset:
## - With 12 samples
## - Stored on disk (not loaded yet)
## - class, matrix, patient_id, control_level, day
## - No previous history
## - Queued operations:
##   - read_sample:
##       base_dir: /storage/projects/TargetML/tgn/all_samples
##       parser: default
##   - setSampleNamesAsDescription

1.2 2) Pre-process

# 1) Filter (only in Dt)
filterDt(urine, dt = c(5, 20)) # in ms

# 2) Smooth
smooth(urine, rt_length_s = 3, dt_length_ms = 0.14)

# 3) Decimate
decimate(urine, rt_factor = 1, dt_factor = 2)

1.3 Visualisation of some samples


1.4 3) Pre-alignment

reference <- 4
align(urine, reference_sample_idx = reference, align_dt = TRUE, align_ip = TRUE, method_rt = "none")
urine$realize()

align(urine, reference_sample_idx = reference, align_dt = FALSE, align_ip = FALSE, method_rt = "ptw", ploynomial_order = 2)
urine$realize()

filterDt(urine, dt_range = c(min(dtime(urine)), max(dtime(urine))))
filterRt(urine, rt_range = c(min(rtime(urine)), max(rtime(urine))))

plot_interactive(plotRIC(urine) + guides(colour = "none"))
plot_interactive(plotTIS(urine)+guides(colour="none"))

1.5 4) Alignment


1.6 5) findPeaks()


1.7 6) clusterPeaks()

peak_clustering <- clusterPeaks(
  peak_list,
  distance_method = "euclidean",
  dt_cluster_spread_ms = 0.1,
  rt_cluster_spread_s = 20,
  clustering = list(method = "hclust")
)

peak_list_clustered <- peak_clustering$peak_list_clustered

1.7.1 Cluster Interactive Visualisation


1.8 7) integratePeaks()

integratePeaks(
  urine, 
  peak_clustering$peak_list, 
  integration_size_method = "fixed_size", 
  rip_saturation_threshold = 0.1
)

peak_list <- peaks(urine)

1.9 8) peakTable()

peak_table <- peakTable(peak_list, aggregate_conflicting_peaks = max)

# Impute missing values
peak_table_imputed <- imputePeakTable(
  peak_table$peak_table_matrix,
  urine, 
  peak_clustering$cluster_stats
)

1.10 9) Baseline Correction

Click here to view the Baseline Correction Report

cluster_stats <- peak_clustering$cluster_stats

peak_table_corrected <- correctBaseline(
  urine, 
  peak_list, 
  cluster_stats, 
  ampliation = 200
)

1.11 10) Saving all results